clear all 
capture log close

global data "/Users/ceweber/Dropbox/JPAM_TIV_code/original_data/"
global dta "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_files/"
global dataclean "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_output/"
global output "/Users/ceweber/Dropbox/JPAM_TIV_code/output/"

set more off
cd "$data"
use "supply_chain_summary.dta", clear

cd "$dataclean" 

gen usableweightwh=usable_weight*weight_original /*this is total weight of the sale*/
gen pricepergramwh=unitprice_wholesale / usable_weight
gen totalsaleswh=unitprice_wholesale*weight_original

*get rid of duplicates/multiple sales of the same item:
egen tag=tag(inventoryid salesorg salesloc)
egen meantag=mean(tag), by(inventoryid salesorg salesloc)
egen  maxprice=max(pricepergramwh), by(inventoryid salesorg salesloc)

drop if meantag<1&pricepergramwh<maxprice
drop tag maxprice meantag
egen tag=tag(inventoryid salesorg salesloc)
drop if tag==0
drop tag

set more off 
*count
*drop if strain==""

cd "$dta" 
mmerge strain using "strain-map-post-cleaning.dta", ukeep(strain_final) type(n:1) unmatched(master) 
rename strain_final strains
*drop strain 
*drop if strains==""

*generate a pre-roll indicator:
gen tmp3=lower(productname)
replace productname=tmp3

gen pre="pre"
gen preroll=strpos(productname, pre)
gen roll="roll"
gen tmp1=strpos(productname, roll)
replace preroll=tmp1 if preroll==0&tmp1>=1&tmp1<.
gen joint="joint"
gen tmp2=strpos(productname, joint)
replace preroll=tmp2 if preroll==0&tmp2>=1&tmp2<.
drop tmp2
gen fattie="fattie"
gen tmp2=strpos(productname, fattie)
replace preroll=tmp2 if preroll==0&tmp2>=1&tmp2<.
drop tmp2
gen smalls="smalls"
gen tmp2=strpos(productname, smalls)
replace preroll=tmp2 if preroll==0&tmp2>=1&tmp2<.
drop tmp2
gen spliff="spliff"
gen tmp2=strpos(productname, spliff)
replace preroll=tmp2 if preroll==0&tmp2>=1&tmp2<.
drop tmp2
gen cone="cone"
gen tmp2=strpos(productname, cone)
replace preroll=tmp2 if preroll==0&tmp2>=1&tmp2<.
drop tmp2
gen pinecone="pinecone"
gen tmp2=strpos(strains, pinecone)
replace preroll=0 if preroll==0&tmp2>=1&tmp2<.

gen pr="pr"
gen tmp4=strpos(productname, pr)
gen pra="pra"
gen tmp5=strpos(productname, pra)
replace tmp4=0 if tmp5>0&tmp5<.
gen pro="pro"
gen tmp6=strpos(productname, pro)
replace tmp4=0 if tmp6>0&tmp6<.
gen prf="prf"
gen tmp7=strpos(productname, prf)
replace tmp4=0 if tmp7>0&tmp7<.
gen spr="spr"
gen tmp8=strpos(productname, spr)
replace tmp4=0 if tmp8>0&tmp8<.
gen pres="pres"
gen tmp9=strpos(productname, pres)
replace preroll=0 if tmp9>0&tmp9<.
gen pri="pri"
gen tmp10=strpos(productname, pri)
replace tmp4=0 if tmp10>0&tmp10<.
drop tmp10
gen prm="prm"
gen tmp10=strpos(productname, prm)
replace tmp4=0 if tmp10>0&tmp10<.
drop tmp10
gen prp="prp"
gen tmp10=strpos(productname, prp)
replace tmp4=0 if tmp10>0&tmp10<.
drop tmp10
gen prem="prem"
gen tmp10=strpos(productname, prem)
replace preroll=0 if tmp10>0&tmp10<.
drop tmp10
replace preroll=tmp4 if tmp4>0&tmp4<.

drop roll-prem pre


gen wholesale=1

rename proclog procloc
keep inventoryid salesorg salesloc pricepergramwh date_wholesale strain productname  type usableweightwh  procloc wholesale potency* totalsaleswh prodorg unitprice_wholesale preroll

summ preroll if type!=28

keep if type==28|(type==31&preroll==1)
count

gen potency_thctotal= potency_thca* 0.877 +potency_thc

cd "$data"
mmerge inventoryid salesorg salesloc using "dispensing.dta", umatch( inventoryid orgid location)   type(1:n) unmatched(both) ukeep(sessiontime transactionid inventorytype weight usableweight price deleted refunded transactionid_original)
  
*egen tag=tag(inventoryid)
*count if tag

*tab inventorytype
keep if inventorytype==28 |(inventorytype==31&preroll==1)/*keep only usable marijuana*/
tab type
drop type
 
*count if tag&wholesale==1
*drop tag

*keep if weightwh<10

count if wholesale==1
drop _merge

drop if deleted==1
drop if refunded==1
drop if pricepergramwh<=.01
drop if totalsaleswh<=.01
drop if unitprice_wholesale<=.01

drop deleted refunded transactionid_original

*convert from %tC format to day (this is day sold from processor to retailer):
gen date=trunc(dofc(sessiontime))
format date %d

gen hour=hh(sessiontime)
drop sessiontime

cd "$dataclean"
compress
mmerge salesloc date using "round_amounts", type(n:1) unmatched(master) umatch(location date) ukeep(chosen_amount)

gen month=month(date)
gen year=year(date)
gen date2=date+1
gen weekly=wofd(date2)
format weekly %tw
gen monthly=mofd(date)
compress
*summ weekly

*convert from %tC format to day (this is day sold from processor to retailer):
gen datewh=trunc(dofc(date_wholesale))
format datewh %d

gen daysfromwholesale=date-datewh 
drop datewh
*summ daysfromwholesale

rename weight items 

/*merge in what we know about tax rates for each store by geo*/
cd "$dta"
compress
mmerge salesloc using "locations-to-tax-locality.dta", ///
umatch(locid) type(n:1) unmatched(master)  
gen timed = dofm(monthly) 
gen quarter = qofd(timed)
mmerge locality_code quarter using "allrates.dta", ///
umatch(locality_code quarter)   type(n:1) unmatched(master)
drop _merge timed quarter state_rate locality_code
cd "$dataclean"

gen TaxChange=0
replace TaxChange=1 if monthly>=666

egen pricemode=mode(price), by(inventoryid usableweight monthly)
rename pricemode pricemode_month
replace price=pricemode_month if abs(price/pricemode_month-10)<=.1
replace price=pricemode_month if  abs(price/pricemode_month-100)<=1
replace price=pricemode_month if  abs(price/pricemode_month-.1)<=.01
replace price=pricemode_month if  abs(price/pricemode_month-.01)<=.001

egen pricemode=mode(price), by(inventoryid usableweight weekly)
replace price=pricemode if abs(price/pricemode-10)<=.1
replace price=pricemode if  abs(price/pricemode-100)<=1
replace price=pricemode if  abs(price/pricemode-.1)<=.01
replace price=pricemode if  abs(price/pricemode-.01)<=.001
rename pricemode pricemode_weekly

egen pricemode=mode(price), by(inventoryid usableweight TaxChange)
replace price=pricemode if abs(price/pricemode-10)<=.1
replace price=pricemode if  abs(price/pricemode-100)<=1
replace price=pricemode if  abs(price/pricemode-.1)<=.01
replace price=pricemode if  abs(price/pricemode-.01)<=.001
drop pricemode
drop pricemode_month
rename pricemode_weekly pricemode

gen titotalsales=chosen_amount*price
gen tipricepergram=titotalsales/usableweight

gen pricepergram=tipricepergram/(1.25*(1+combined_rate))
replace pricepergram=tipricepergram/((1+.37+combined_rate)) if date>=20270

gen totalsales=titotalsales/(1.25*(1+combined_rate))
replace totalsales=titotalsales/((1+.37+combined_rate)) if date>=20270

gen markup=pricepergram/pricepergramwh-1

drop date2 potency_thc potency_thca potency_total wholesale year month 

*clean potency data, drop extreme outliers:
replace potency_thctotal=. if potency_thctotal==0
replace potency_thctotal=. if potency_thctotal>=40

* save cya.dta, replace

*clean weight data, drop weights sig greater than an ounce:
count
drop if usableweight>28.5&usableweight!=.
count

*clean price data, drop extreme prices:
drop if tipricepergram>42&tipricepergram!=.
count
drop if tipricepergram==0
count

*clean wholesale prices:
drop if pricepergramwh>42&pricepergramwh!=.
count

compress
egen tag=tag(hour date salesloc salesorg)
egen tothours=total(tag),by(date salesloc)
drop tag
egen meanhours=mean(hour),by(date salesloc)

cd "$dataclean"
compress
save matched_wholesale_retail_clean.dta, replace

*****1 Collapse

*TAKE CLEANED DATA SET AND COLLAPSE BY RETAIL LOCAION-DAY AND SAVE
cd "$dataclean"

use matched_wholesale_retail_clean.dta, clear

cd "$dta"
mmerge strain using "${strains}strain-map-post-cleaning.dta", ukeep(strain_final) type(n:1) unmatched(master) 
rename strain_final strains
drop strain _merge
describe
 
egen lots=tag(inventoryid date)
egen sale=tag(transactionid) 

gen profit = .
replace profit = (pricepergram - pricepergramwh)*usableweight //if inventorytype==28
//replace profit = (totalsales - unitprice_wholesale) if inventorytype!=28

gen taxrev=titotalsales-totalsales
gen thcweight=usableweight*potency_thctotal
egen tmp=tag(strains salesloc date)
gen strain=tmp

/*
gen price_combine = .
	
	replace price_combine = pricepergram if inventorytype==28
	replace price_combine = totalsales if inventorytype!=28
	
	egen p995 = pctile(price_combine), p(99.5)
	egen p005 = pctile(price_combine), p(00.5)
	
	replace price_combine = . if price_combine>p995
	replace price_combine = . if price_combine<p005
*/
/*
hist profit
graph export profit.eps, as(eps) replace
egen p995 = pctile(profit), p(99.5)
sum p995
egen p005 = pctile(profit), p(00.5)
sum p005

replace profit=. if profit>p995
drop if profit<p005
*/


*now collapse at daily-location level:
collapse (sum) sale items profit  usableweight usableweightwh thcweight totalsales titotalsales lots strain (mean) meanhours tothours /*price_combine*/ pricemode pricepergramwh pricepergram tipricepergram markup daysfromwholesale potency_thctotal potency_cbd /*logmeanprice pricepergram_dmean logmeantiprice tipricepergram_dmean*/, by(date salesloc) fast

cd "$dataclean"
save collapsed_retail_wholesale_daily_clean_part1.dta, replace
 
